Imports

library(evd); 
library(evdbayes);
library(coda);
library(ismev);
library(xts);

Loading the Data

load("../data/CAPE_Minder_Rychener_Malsot.RData")
load("../data/NINO34.RData")
load("../data/SRH_Minder_Rychener_Malsot.RData")

Generate PROD

prod = sqrt(cape)*srh

** Create Time Series Objects **

dates <- seq.Date(as.Date("1979-1-1"), as.Date("2015-12-31"), by=1)
feb29ix <- format(as.Date(dates), "%m-%d") == "02-29"
dates <- dates[!feb29ix]
prod_ts = xts(prod, order.by = rep(dates, each=8))

Beginning the Analysis

month_names = c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec")
get_monthly = function(x) {
  output = list()
  len = nrow(x)
  
  # Get month of element
  month = time(x)
  month = gsub("....-", "", month)
  month = gsub("-..", "", month)
  monthlist = unique(month)
  for (i in 1:12) {
    output[[i]] = x[month == monthlist[i],]
  }
  names(output) = month_names
  return(output)
}
monthly_max = get_monthly(apply.monthly(prod_ts, max))
r = 2
r_monthly_max = get_monthly(apply.monthly(prod_ts, function(x) order(x, decreasing=T)[1:r]))
# get the monthly maxima of prod
m1 = as.data.frame(apply.monthly(prod_ts, max))$V1;
# produce the gumbel qq plot
gumbel_qq = function(x, title="") qqplot(qgumbel(c(1:length(x))/(length(x)+1)),
                                         x,
                                         main = paste("Gumbell Q-Q Plot", title),
                                         xlab = "Theoretical Quantiles", 
                                         ylab = "Sample Quantiles") ;
gumbel_qq(m1)

#qqplot(qgumbel(c(1:length(m1))/(length(m1)+1)),m1,main = "Gumbell Q-Q Plot",xlab = "Theoretical Quantiles", ylab = "Sample Quantiles") ;

The qq plot doesn’t fit very well, especially in the lower tail. This is likely due to seasonal dependence.

Fitting GEV to the entire data

# fit gevd with MLE and produce diagnostic plots
fitmax.MLE<-fgev(m1,method="Nelder-Mead")
par(mfrow=c(2,2))
fitmax.MLE

Call: fgev(x = m1, method = "Nelder-Mead") 
Deviance: 9272.599 

Estimates
      loc      scale      shape  
7.519e+03  7.013e+03  4.757e-03  

Standard Errors
      loc      scale      shape  
421.90201  331.43749    0.05347  

Optimization Information
  Convergence: successful 
  Function Evaluations: 171 
plot(fitmax.MLE)

Poor fit, probably because the distribution isn’t stationary. This is especially visible in the Probability plot, in which the confidence band is surpassed, indicating a poor fit.

# fit gevd with Bayesian Techniques
# use the previous outputs (rounded) as initial values (use a different shape)
init<-c(1.6e4,4e3,0.1)
# arbitrary priors
mat <- diag(c(10000,10000,100)) 
pn <- prior.norm(mean=c(0,0,0),cov=mat)
# proposal standard deviation (found by trial and error to get 20%<acceptance rate<40%)
psd<-c(500,0.03,0.02)
# draw 3k samples from markov chain
nit = 30000
thinning = 300
post<-posterior(nit, init=init,prior=pn,lh="gev",data=m1,psd=psd)
# diagnostic plots
MCMC<-mcmc(post[1:nit %% thinning == 0, ], thin=300) 
plot(MCMC) 

attr(mcmc(post),"ar")
            mu sigma   xi total
acc.rates 0.24  0.72 0.70  0.55
ext.rates 0.00  0.00 0.01  0.00
#MCMC_stab <- mcmc(post, thin=50, start=1000)
acf(mcmc(post[1:nit %% thinning == 0, ]))

We observe that there seem to be no substantial issues from the autocorrelation plots.

apply(mcmc(post[1:nit %% thinning == 0, ]),2,mean)
           mu         sigma            xi 
  216.8681802 11212.0488049    -0.1898789 
apply(mcmc(post[1:nit %% thinning == 0, ]),2,sd)
          mu        sigma           xi 
101.68289973 737.97773473   0.04013345 

** Fit with MLE for months separately**

#monthly_fits = lapply(monthly_max, 
#                      function(x) fgev(data.frame(x)[,1], method="Nelder-Mead"))
monthly_fits = list()
error_cases = c(9, 12)
for (i in 1:length(monthly_max)) {
  print(i)
  
  if (i %in% error_cases) {
    monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1, 
                             method = "Nelder-Mead",
                             std.err = FALSE)
  }
  else {
    monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1, 
                             method = "Nelder-Mead")
  }
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
optimization may not have succeeded
[1] 10
[1] 11
[1] 12
names(monthly_fits) = names(monthly_max)
gumbel_qq(data.frame(monthly_max[[9]])[,1], "September")

gumbel_qq(data.frame(monthly_max[[12]])[,1], "December")

get_se = function(x, ix) {
  if (is.null(x$std.err)) 0.01
  else x$std.err[ix]
}
mle_loc = unlist(lapply(monthly_fits, function(x) x$estimate[1]))
mle_loc_se = unlist(lapply(monthly_fits, get_se, 1))
mle_scale = unlist(lapply(monthly_fits, function(x) x$estimate[2]))
mle_scale_se = unlist(lapply(monthly_fits, get_se, 2))
mle_shape = unlist(lapply(monthly_fits, function(x) x$estimate[3]))
mle_shape_se = unlist(lapply(monthly_fits, get_se, 3))
plot_w_err = function(x, y, se, title = NULL) {
  max_ix = which.max(y)
  min_ix = which.min(y)
  plot(x, y,
       ylim = c(y[min_ix] - se[min_ix], y[max_ix] + se[max_ix]),
       main = title)
  arrows(x,y-se,x,y+se, code=3, length=0.02, angle = 90)
}
plot_w_err(1:12, mle_loc, mle_loc_se, "Location Parameter as Estimated with Likelihood")

plot_w_err(1:12, mle_scale, mle_scale_se, "Scale Parameter as Estimated with Likelihood")

plot_w_err(1:12, mle_shape, mle_shape_se, "Shape Parameter as Estimated with Likelihood")

** Fit with Bayesian Methods for months separately**

bayes_fitter = function(x, 
                        init = c(1e3, 1e3, 0.1), # Initial values
                        mat = diag(c(10000,10000,100)),
                        psd = c(500,0.1,0.1), # Proposed SDev
                        nit = 3000, # Nb Iterations
                        thinning = 50, # Thinning Factor
                        do_plots = FALSE, # Bool whether to show plot
                        seed = 42 # Seed 
                        ) {
  set.seed(seed)                
  pn = prior.norm(mean=c(0,0,0),cov=mat)
  post = posterior(nit, init=init, prior=pn, lh="gev", data=x, psd=psd)
  
  if(do_plots) {
    MCMC<-mcmc(post[1:nit %% thinning == 0, ]) 
    plot(MCMC) 
  }
  list(posterior = post, 
       acceptance_rate = attr(mcmc(post),"ar"))
}
monthly_bayes_fit = lapply(monthly_max, bayes_fitter, do_plots = TRUE,
                           psd = c(500,0.3,0.3), nit=30000, thinning = 300)

acceptance_rates = lapply(monthly_bayes_fit, function(x) x$acceptance_rate[1,])
print(acceptance_rates)
$jan
   mu sigma    xi total 
 0.16  0.56  0.58  0.44 

$feb
   mu sigma    xi total 
 0.24  0.54  0.44  0.40 

$mar
   mu sigma    xi total 
 0.24  0.32  0.20  0.25 

$apr
   mu sigma    xi total 
 0.23  0.19  0.15  0.19 

$may
   mu sigma    xi total 
 0.24  0.17  0.11  0.17 

$jun
   mu sigma    xi total 
 0.24  0.18  0.12  0.18 

$jul
   mu sigma    xi total 
 0.23  0.12  0.09  0.14 

$aug
   mu sigma    xi total 
 0.24  0.22  0.14  0.20 

$sep
   mu sigma    xi total 
 0.23  0.15  0.11  0.16 

$oct
   mu sigma    xi total 
 0.24  0.38  0.23  0.28 

$nov
   mu sigma    xi total 
 0.24  0.46  0.33  0.34 

$dec
   mu sigma    xi total 
 0.19  0.58  0.53  0.43 

rr # fit the r largest method #rl = rlarg.fit(data) data(venice) venfit <- rlarg.fit(venice[,-1])

$conv
[1] 0

$nllh
[1] 1139.09

$mle
[1] 120.5479027  12.7840265  -0.1129418

$se
[1] 1.36234055 0.54944881 0.01986948

PART 2

rr #plot the maximum as a function of ENSO plot(nino34,m1,main = of Monthly Maxima of PROD vs ENSO,xlab = , ylab = Maxima of PROD)

rr n_years = length(m1)/12 month_indexes = rep(c(1,2,3,4,5,6,7,8,9,10,11,12),n_years) plot(month_indexes,m1,main = Maxima of PROD,xlab = , ylab = Maxima of PROD)

# plot the chi plot for dependance analysis
dat.m1_month = cbind(m1,month_indexes);
dat.m1_nino = cbind(m1,nino34);
chiplot(dat.m1_month);
chiplot(dat.m1_nino);

PART 3 We will now analyse temporal clustering of extremes. For this, we will use the exiplot function from the evd library.

# get sample quantiles (used as plotting thresholds)
prod.quantiles = quantile(prod, c(0.8, 0.85, 0.9, 0.95,0.99,0.999))
exiplot(prod,prod.quantiles,main = "Analyiss of Extremal clustering",xlab = "Threshold", ylab = "Extremal Index")

We observe that the extremal index is ~0.2, we can therefore conclude that we have strong dependance of extremes, with the limiting mean cluster size being roughly 5. The clustering has no effect for estimaters based on the (monthly) maximum, but the r largest estimator is influenced by it.

---
title: "Thunderstrom Analysis"
output: html_notebook
---
**Imports**
```{r}
library(evd); 
library(evdbayes);
library(coda);
library(ismev);
library(xts);

```


**Loading the Data**

```{r}
load("../data/CAPE_Minder_Rychener_Malsot.RData")
load("../data/NINO34.RData")
load("../data/SRH_Minder_Rychener_Malsot.RData")
```

**Generate PROD**
```{r}
prod = sqrt(cape)*srh
```




** Create Time Series Objects **
```{r}
dates <- seq.Date(as.Date("1979-1-1"), as.Date("2015-12-31"), by=1)
feb29ix <- format(as.Date(dates), "%m-%d") == "02-29"
dates <- dates[!feb29ix]

prod_ts = xts(prod, order.by = rep(dates, each=8))
```



**Beginning the Analysis**
```{r}
month_names = c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec")
get_monthly = function(x) {
  output = list()
  len = nrow(x)
  
  # Get month of element
  month = time(x)
  month = gsub("....-", "", month)
  month = gsub("-..", "", month)
  monthlist = unique(month)
  for (i in 1:12) {
    output[[i]] = x[month == monthlist[i],]
  }
  names(output) = month_names
  return(output)
}

monthly_max = get_monthly(apply.monthly(prod_ts, max))
r = 2
r_monthly_max = get_monthly(apply.monthly(prod_ts, function(x) order(x, decreasing=T)[1:r]))
```


```{r}
# get the monthly maxima of prod
m1 = as.data.frame(apply.monthly(prod_ts, max))$V1;
# produce the gumbel qq plot
gumbel_qq = function(x, title="") qqplot(qgumbel(c(1:length(x))/(length(x)+1)),
                                         x,
                                         main = paste("Gumbell Q-Q Plot", title),
                                         xlab = "Theoretical Quantiles", 
                                         ylab = "Sample Quantiles") ;

gumbel_qq(m1)

#qqplot(qgumbel(c(1:length(m1))/(length(m1)+1)),m1,main = "Gumbell Q-Q Plot",xlab = "Theoretical Quantiles", ylab = "Sample Quantiles") ;
```
The qq plot doesn't fit very well, especially in the lower tail. This is likely due
to seasonal dependence.

**Fitting GEV to the entire data**
```{r}
# fit gevd with MLE and produce diagnostic plots
fitmax.MLE<-fgev(m1,method="Nelder-Mead")
par(mfrow=c(2,2))
fitmax.MLE
plot(fitmax.MLE)
```
Poor fit, probably because the distribution isn't stationary. This is especially 
visible in the Probability plot, in which the confidence band is surpassed, 
indicating a poor fit.


```{r}
# fit gevd with Bayesian Techniques
# use the previous outputs (rounded) as initial values (use a different shape)
init<-c(1.6e4,4e3,0.1)
# arbitrary priors
mat <- diag(c(10000,10000,100)) 
pn <- prior.norm(mean=c(0,0,0),cov=mat)
# proposal standard deviation (found by trial and error to get 20%<acceptance rate<40%)
psd<-c(500,0.03,0.02)
# draw 3k samples from markov chain
nit = 30000
thinning = 300
post<-posterior(nit, init=init,prior=pn,lh="gev",data=m1,psd=psd)
# diagnostic plots
MCMC<-mcmc(post[1:nit %% thinning == 0, ], thin=300) 
plot(MCMC) 
attr(mcmc(post),"ar")

```


```{r}
#MCMC_stab <- mcmc(post, thin=50, start=1000)
acf(mcmc(post[1:nit %% thinning == 0, ]))
```
We observe that there seem to be no substantial issues from the autocorrelation 
plots. 

```{r}
apply(mcmc(post[1:nit %% thinning == 0, ]),2,mean)
apply(mcmc(post[1:nit %% thinning == 0, ]),2,sd)

```

** Fit with MLE for months separately**
```{r}
#monthly_fits = lapply(monthly_max, 
#                      function(x) fgev(data.frame(x)[,1], method="Nelder-Mead"))
monthly_fits = list()
error_cases = c(9, 12)
for (i in 1:length(monthly_max)) {
  print(i)
  
  if (i %in% error_cases) {
    monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1, 
                             method = "Nelder-Mead",
                             std.err = FALSE)
  }
  else {
    monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1, 
                             method = "Nelder-Mead")
  }
}
names(monthly_fits) = names(monthly_max)

```

```{r}
gumbel_qq(data.frame(monthly_max[[9]])[,1], "September")
gumbel_qq(data.frame(monthly_max[[12]])[,1], "December")
```
```{r}
get_se = function(x, ix) {
  if (is.null(x$std.err)) 0
  else x$std.err[ix]
}
mle_loc = unlist(lapply(monthly_fits, function(x) x$estimate[1]))
mle_loc_se = unlist(lapply(monthly_fits, get_se, 1))
mle_scale = unlist(lapply(monthly_fits, function(x) x$estimate[2]))
mle_scale_se = unlist(lapply(monthly_fits, get_se, 2))
mle_shape = unlist(lapply(monthly_fits, function(x) x$estimate[3]))
mle_shape_se = unlist(lapply(monthly_fits, get_se, 3))
```

```{r}
plot_w_err = function(x, y, se, title = NULL) {
  max_ix = which.max(y)
  min_ix = which.min(y)
  plot(x, y,
       ylim = c(y[min_ix] - se[min_ix], y[max_ix] + se[max_ix]),
       main = title)
  arrows(x,y-se,x,y+se, code=3, length=0.02, angle = 90)
}
plot_w_err(1:12, mle_loc, mle_loc_se, "Location Parameter as Estimated with Likelihood")
plot_w_err(1:12, mle_scale, mle_scale_se, "Scale Parameter as Estimated with Likelihood")
plot_w_err(1:12, mle_shape, mle_shape_se, "Shape Parameter as Estimated with Likelihood")

```

** Fit with Bayesian Methods for months separately**
```{r}

bayes_fitter = function(x, 
                        init = c(1e3, 1e3, 0.1), # Initial values
                        mat = diag(c(10000,10000,100)),
                        psd = c(500,0.1,0.1), # Proposed SDev
                        nit = 3000, # Nb Iterations
                        thinning = 50, # Thinning Factor
                        do_plots = FALSE, # Bool whether to show plot
                        seed = 42 # Seed 
                        ) {
  set.seed(seed)                
  pn = prior.norm(mean=c(0,0,0),cov=mat)
  post = posterior(nit, init=init, prior=pn, lh="gev", data=x, psd=psd)
  
  if(do_plots) {
    MCMC<-mcmc(post[1:nit %% thinning == 0, ]) 
    plot(MCMC) 
  }
  list(posterior = post, 
       acceptance_rate = attr(mcmc(post),"ar"))
}

monthly_bayes_fit = lapply(monthly_max, bayes_fitter, do_plots = TRUE,
                           psd = c(500,0.3,0.3), nit=30000, thinning = 300)
acceptance_rates = lapply(monthly_bayes_fit, function(x) x$acceptance_rate[1,])
print(acceptance_rates)

```



```{r}
# fit the r largest method
#rl = rlarg.fit(data)
data(venice)
venfit <- rlarg.fit(venice[,-1])
```


**PART 2**
```{r}
#plot the maximum as a function of ENSO
plot(nino34,m1,main = "Comparison of Monthly Maxima of PROD vs ENSO",xlab = "ENSO", ylab = "Monthly Maxima of PROD")
n_years = length(m1)/12
month_indexes = rep(c(1,2,3,4,5,6,7,8,9,10,11,12),n_years)
plot(month_indexes,m1,main = "Monthly Maxima of PROD",xlab = "Month", ylab = "Monthly Maxima of PROD")
```
```{r}
# plot the chi plot for dependance analysis
dat.m1_month = cbind(m1,month_indexes);
dat.m1_nino = cbind(m1,nino34);
chiplot(dat.m1_month);
chiplot(dat.m1_nino);
```



**PART 3**
We will now analyse temporal clustering of extremes. For this, we will use the exiplot function from the evd library.

```{r}
# get sample quantiles (used as plotting thresholds)
prod.quantiles = quantile(prod, c(0.8, 0.85, 0.9, 0.95,0.99,0.999))
exiplot(prod,prod.quantiles,main = "Analyiss of Extremal clustering",xlab = "Threshold", ylab = "Extremal Index")
```

We observe that the extremal index is ~0.2, we can therefore conclude that we have strong dependance of extremes, with the limiting mean cluster size being roughly 5. The clustering has no effect for estimaters based on the (monthly) maximum, but the r largest estimator is influenced by it.
